
*+++++++++++++ THE BEGINNING +++++++++++++++++++++++++++++++++++++++

clear
set more off

use ".\Data_Tables_S5_S7_Figure_4_Coeffs.dta", clear


*__________________________________________________________________________________________________
******************************* CREATE OUTCOME AND CONTROL VARIABLES ******************************
*--------------------------------------------------------------------------------------------------

*** OUTCOME VARIABLES***

gen dmwaterpiped=(hv201==11|hv201==12|hv201==13)
replace dmwaterpiped=. if hv201==.|hv201==96
label var dmwaterpiped "Dummy - HH has a piped water source"
tab hv201 dmwaterpiped

gen dmwaterpiped_close=(hv201==11|hv201==12)
replace dmwaterpiped_close=. if hv201==.|hv201==96
label var dmwaterpiped_close "Dummy - HH has a piped water source in the dwelling or compound"
tab hv201 dmwaterpiped_close

gen dmwaterimproved=(hv201==11|hv201==12|hv201==13|hv201==23|hv201==24|hv201==41)
replace dmwaterimproved=. if hv201==.|hv201==96
label var dmwaterimproved "Dummy - HH has an improved water source"
tab hv201 dmwaterimproved

gen timetowater=hv204
replace timetowater=0 if hv204==996
replace timetowater=. if hv204==995
*judgement call: recode as missing if time to water is coded as '1 day or longer' (cannot assess how long this  is and it could vary across respondents providing this same answer).
bysort hv024 hv025: egen medtimetowater=median(timetowater)
replace timetowater=medtimetowater if hv204==995
*judgement call: if missing, recode travel time to water as median in region/ type of place of residence (rural or urban) for taht household
drop medtimetowater
label var timetowater "time to get to water source (minutes)"

gen lntimetowater=ln(timetowater)
label var lntimetowater "Log time to get to water source (minutes)"


*** CONTROL VARIABLES AND VARIABLES ON WHICH WE CHECK FOR BALANCE***

rename hv009 hh_cntrl_qty_HH_members

rename hv220 age
label var age "age of head of household"

gen age_sq=age^2
label var age_sq "age of head of household, squared"

gen dm_literate_old=0
foreach x in 1 2 3 4 5 6 7 8 9 {
replace dm_literate_old=1 if sh14_0`x'==1 & hv003==`x'
replace dm_literate_old=. if (sh14_0`x'==8|sh14_0`x'==.) & hv003==`x'
}
label var dm_literate_old "Dummy - HH head literate"

gen dm_literate=(sh14_01==1)
replace dm_literate=. if sh14_01==.|sh14_01==8
label var dm_literate "Dummy - HH head literate"

gen hh_cntrl_cropland=(sh25ab==1)
replace hh_cntrl_cropland=. if sh25ab==.
label var hh_cntrl_cropland "Dummy - crop land (i.e. sh25ab==1)"

gen dm_male=(hv219==1)
replace dm_male=. if hv219==.
label var dm_male "Dummy - HH head male"

gen dmownhouse=(sh25aa==1)
replace dmownhouse=. if sh25aa==.
label var dmownhouse "Dummy - HH owns its house"

gen dmmorethan1room=(sh27b>1 & sh27b!=.)
replace dmmorethan1room=. if sh27b==.
label var dmmorethan1room "Dummy - house has more than 1 room for sleeping"

gen dmlargelivestock=(sh25ac==1|sh25ad==1)
replace dmlargelivestock=. if sh25ac==. | sh25ad==.
label var dmlargelivestock "Dummy - HH owns large livestock (cattle, camel, horse, mule, or donkey)"

gen dmedsecorhigher=(hv106_01==2|hv106_01==3)
replace dmedsecorhigher=. if hv106_01==. | hv106_01==8
label var dmedsecorhigher "Dummy - HH head has secondary or higher education"



*_______________________________________________________________________________________________________________________________________________________________________________________________________
******************************* REMOVE URBAN AREAS, REGION BORDERS WITH SAME DECENTRALIZATION STATUS ON BOTH SIDES, AND REGION PAIRS FOR WHICH NEITHER REGION IS IN MAIN SAMPLE FOR ANALYSIS (I.E. IN 2008-09 IFPRI SURVEY) **************************************
*-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

*Drop urban areas
drop if hv025==1

*Drop non-policy-change borders
drop if dmPCB!=1

*Drop region pairs containing at least one region that is not among our sample regions
drop if A3==1| A8==1| A10==1




*_________________________________________________________________________________
******************************* REGRESSIONS **************************************
*---------------------------------------------------------------------------------

*** Set controls macro

#delimit;

 local hh_controls 
"dm_male
 dm_literate
 dmedsecorhigher
 age
 age_sq
 hh_cntrl_qty_HH_members
 hh_cntrl_cropland
 dmownhouse 
 dmmorethan1room
 dmlargelivestock
 yourethshare";
 
 #delimit cr

*** Locals for different sets of RHS variables

local M4b_lin 	 "D 	  	A1 A2 A3 A4 A5 A6 A7 A8 A9 A10 A11 A12 A13 	 A1_B A2_B A3_B A4_B A5_B A6_B A7_B A8_B A9_B A10_B A11_B A12_B A13_B 		   	 A1_S A2_S A3_S A4_S A5_S A6_S A7_S A8_S A9_S A10_S A11_S A12_S A13_S 		 A1_B_S A2_B_S A3_B_S A4_B_S A5_B_S A6_B_S A7_B_S A8_B_S A9_B_S A10_B_S A11_B_S A12_B_S A13_B_S    		        "
local M4b_squ 	 "																		  		 A1_Bsqu_S A2_Bsqu_S A3_Bsqu_S A4_Bsqu_S A5_Bsqu_S A6_Bsqu_S A7_Bsqu_S A8_Bsqu_S A9_Bsqu_S A10_Bsqu_S A11_Bsqu_S A12_Bsqu_S A13_Bsqu_S 		   	    "
local M_b_squ 	 "					   	A1_Bsqu A2_Bsqu A3_Bsqu A4_Bsqu A5_Bsqu A6_Bsqu A7_Bsqu A8_Bsqu A9_Bsqu A10_Bsqu A11_Bsqu A12_Bsqu A13_Bsqu		   "


**** Initialize all outreg files

local labb: var label dmwaterpiped
#delimit;
reg dmwaterpiped `M4b_lin'    `M_b_squ' 	`M4b_squ', nocon cluster(zone); outreg2 using ".\Table_S7.xls", bdec(4) e(all) se label(insert) ctitle(IGNORE, `labb') 
	  		  sortvar(`M4b_lin'    `M_b_squ' 	`M4b_squ') replace;
reg dm_male `M4b_lin'    `M_b_squ' 	`M4b_squ', nocon cluster(zone); outreg2 using ".\Table_S5.xls", bdec(4) e(all) se label(insert) ctitle(IGNORE, `labb') 
	  		  sortvar(`M4b_lin'    `M_b_squ' 	`M4b_squ') replace;
#delimit cr

tempfile full_DHS
   save `full_DHS'

   

*TABLE S7 (PRE-DECENTRALIZATON PLACEBO ANALYSIS, ACCESS TO DRINKING WATER IN 2000), 

*Note: use 75km bandwidth (same as 2008-09 IFPRI survey used in main analysis)

use `full_DHS', clear
drop if S>=75 | S<=-75

#delimit;
foreach outcome of varlist dmwaterpiped dmwaterimproved timetowater { ; 
local labb: var label `outcome'; foreach z in "`hh_controls'" {; 
qui reg `outcome' `M4b_lin' `M_b_squ' `M4b_squ' `z', nocon cluster(zone); outreg2 using ".\Table_S7.xls", bdec(4) e(all) se label(insert) ctitle(Table S7, `labb') 
		  sortvar(`M4b_lin' `M_b_squ' `M4b_squ' ) append; }; };
#delimit cr



*TABLE S7 OUTCOME VARIABLES MEANS (EXPORT)

gen fullcontrols=1
foreach x in `hh_controls' {
replace fullcontrols=0 if `x'==.
}
label var fullcontrols "Dummy - observations as data on all control variables"

estpost summarize dmwaterpiped dmwaterimproved timetowater if fullcontrols==1
estout using ".\Table_S7_Summary_Statistics_Row.xls", replace cells("count mean sd min max") label



*TABLE S5 (BALANCE ON HOUSEHOLD CHARACTERISTICS IN 2000)

#delimit;
foreach balancechecks of varlist dm_male dm_literate dmedsecorhigher age hh_cntrl_qty_HH_members hh_cntrl_cropland dmownhouse dmmorethan1room dmlargelivestock HHavg_hc72 yourethshare { ; 
local labb: var label `balancechecks'; foreach z in "" {; 
qui reg `balancechecks' `M4b_lin' `M_b_squ' `M4b_squ' `z', nocon cluster(zone); outreg2 using ".\Table_S5.xls", bdec(4) e(all) se label(insert) ctitle(Table S5, `labb') 
		  sortvar(`M4b_lin' `M_b_squ' `M4b_squ' ) append; }; };
#delimit cr



*FIGURE 4 (PRODUCE DATASET FOR USE IN R -- THE PROGRAM USED TO CREATE THE DHS BALANCE TEST PREDICTION FIGURE)

gen age10=age/10
replace age10=. if age==.
label var age10 "age"

set more off
local controlstocheck = "dm_male dm_literate dmedsecorhigher age10 hh_cntrl_qty_HH_members hh_cntrl_cropland dmownhouse dmmorethan1room dmlargelivestock HHavg_hc72 yourethshare"
mat results = J(11,3,0)
local a=1
foreach var of varlist `controlstocheck' {
	label variable `var' "`var'"
	eststo: quietly reg `var' `M4b_lin' `M_b_squ' `M4b_squ', nocon cluster(zone)
	mat results[`a',1] = _b[D]
	mat results[`a',2] = _se[D]
	mat results[`a',3] = `a'
	local ++a
	}
esttab using Figure_4_Coefficients.csv, b(3) se(3) starlevels(* 0.1 ** .05 *** .01) replace
mat2txt, matrix(results) saving(Figure_4_Coefficients) replace 
eststo clear
